/*
Merge pik-year spine (ehf_icf_spine) with 
- pik-year plant capacities (all_styr_100p)
- pik-year capacity IVs (all_piks_ivs_100p)
*/

* define libraries;
libname hannah '/projects/users/########/Snapshot2022/IntermediateData';
libname hantmp '/projects/users/########/Snapshot2022/IntermediateData/TempData';
libname ans '/projects/users/########/Snapshot2022/AnalysisData';


* should be already sorted, but in case not;
proc sort data = hannah.ehf_icf_spine;
  by pik year;
run;

proc sort data = hannah.all_styr_100p;
  by pik year;
run;

proc sort data = hannah.allpiks_ivs100p;
  by pik year;
run;

/* 
* should have already been done in MakeWorkerSpine2022.sas;
data interm.ehf_icf_spine;
  set interm.ehf_icf_spine;
  if age < 18 then delete;
  if age > 65 then delete;
run;
*/

* don't keep years in which people may have lived in other states; 
* could technically get employment status, but let's confine sample to our sample states; 
data ans.ehf_icf_analysis;
  set hannah.ehf_icf_spine (where=(substr([county id],1,2) in ("01" "04" "05" "06" "08" "10" "11" "17" "18" "19" "20" "23" "24" "29" "30" "31" "32" "35" "38" "40" "47" "51" "56")));
run;

* Now do the merge to the plant data; 
proc sql;
  create table ans.ehf_icf_analysis
  as select
  a.*, b.pik, b.year, b.plantcap100, b.plantcap80, b.plantcap60, b.plantcap40, b.plantcap20
  from ans.ehf_icf_analysis as a
    left join hannah.all_styr_100p as b
    on a.pik = b.pik and a.year = b.year
  order by pik, year;
quit;

* Fill in missing plant capacities with zero - if missing pik doesn't live within 100 miles;
data ans.ehf_icf_analysis;
  set ans.ehf_icf_analysis;
  if missing(plantcap20) then plantcap20 = 0;
  if missing(plantcap40) then plantcap40 = 0;
  if missing(plantcap60) then plantcap60 = 0;
  if missing(plantcap80) then plantcap80 = 0;
  if missing(plantcap100) then plantcap100 = 0;
run;



* Merge the capacity IVs. Need to check that whether missing is missing or should be zero.;
proc sql;
  create table ans.ehf_icf_analysis
  as select
  a.*, b.pik, b.year,  
  b.yhattwfe100 as twfehat100, b.twfehat80, b.twfehat60, b.twfehat40, b.twfehat20,
  b.yhatqps100 as qpshat100, b.qpshat80, b.qpshat60, b.qpshat40, b.qpshat20
  from ans.ehf_icf_analysis as a
    left join hannah.allpiks_ivs100p as b
    on a.pik = b.pik and a.year = b.year
  order by pik, year;
quit;
/*
* check correlations between IVs and endogenous;
proc corr data = hannah.ehf_icf_states_plants;
  var caphat20 turbhat20 zhatcnty20;
  with plantcap20;
run;
*/


/* Previous, but keeps piks for years when they didn't live in our 23 states, and we don't have plantcap for those.
proc sql;
  create table hannah.ehf_icf_analysis
  as select
  a.*, b.pik, b.year, b.plantcap100, b.plantcap80, b.plantcap60, b.plantcap40, b.plantcap20
  from hannah.ehf_icf_spine as a
    left join hannah.all_styr_100p as b
    on a.pik = b.pik and a.year = b.year
  order by pik, year;
quit;
*Another way of doing it;
data hannah.ehf_icf_analysis;
  merge hannah.ehf_icf_spine hannah.all_styr_100p (keep = pik year plantcap100 plantcap80 plantcap60 plantcap40 plantcap20);
  by pik year;
run;
*/

* Now fill in pik-years that didnt get a zhat (for now replace with 0 but PikPanel_Xmile_IVs23.sas will get predicted values outside 100 miles);
/*
data hantmp.missingz;
  set hannah.ehf_icf_analysis;
  where zhatcnty100 is missing or 
	zhatcnty80 is missing or
	zhatcnty60 is missing or
	zhatcnty40 is missing or
	zhatcnty20 is missing or
	caphat100 is missing or 
	caphat80 is missing or
	caphat60 is missing or
	caphat40 is missing or
	caphat20 is missing;
run;

data hannah.ehf_icf_analysis;
  set hannah.ehf_icf_analysis;
  if missing(zhatcnty100) then zhatcnty100 = 0;
  if missing(zhatcnty80) then zhatcnty80 = 0;
  if missing(zhatcnty60) then zhatcnty60 = 0;
  if missing(zhatcnty40) then zhatcnty40 = 0;
  if missing(zhatcnty20) then zhatcnty20 = 0;
  if missing(caphat100) then caphat100 = 0;
  if missing(caphat80) then caphat80 = 0;
  if missing(caphat60) then caphat60 = 0;
  if missing(caphat40) then caphat40 = 0;
  if missing(caphat20) then caphat20 = 0;
run;
*/

* Spine can be rebuilt in one day - no big deal to delete;
proc datasets library = hannah nolist;
  delete ehf_icf_spine;
run; quit;


